# load packages, installing if missing
if (!require(librarian)){
install.packages("librarian")
library(librarian)
}
## Loading required package: librarian
librarian::shelf(
tidyverse, dismo, DT, here, htmltools, leaflet, mapview, purrr, raster,
readr, rgbif, rgdal, rJava, sdmpredictors, sf, spocc, tidyr, geojsonio)
##
## The 'cran_repo' argument in shelf() was not set, so it will use
## cran_repo = 'https://cran.r-project.org' by default.
##
## To avoid this message, set the 'cran_repo' argument to a CRAN
## mirror URL (see https://cran.r-project.org/mirrors.html) or set
## 'quiet = TRUE'.
## Warning: multiple methods tables found for 'crop'
## Warning: multiple methods tables found for 'extend'
select <- dplyr::select # overwrite raster::select
options(readr.show_col_type = FALSE)
# set random seed for reproducibility
set.seed(42)
# directory to store data
dir_data <- here("data/sdm")
dir.create(dir_data, showWarnings = F, recursive = TRUE)
obs_csv <- file.path(dir_data, "obs.csv")
obs_geo <- file.path(dir_data, "obs.geojson")
redo <- FALSE
if (!file.exists(obs_geo) | redo){
# get species occurrence data from GBIF with coordinates
(res <- spocc::occ(
query = 'Buteo jamaicensis',
from = 'gbif',
has_coords = T,
limit = 10000))
# extract data frame from result
df <- res$gbif$data[[1]]
readr::write_csv(df, obs_csv)
# convert to points of observation from lon/lat columns in data frame
obs <- df %>%
# limit observations to bounding box around north and central america
filter(between(longitude, -167.593385, -51.171266),
between(latitude, 5.645215, 71.374349)) %>%
sf::st_as_sf(
coords = c("longitude", "latitude"),
crs = st_crs(4326)) %>%
select(prov, key) %>% # save space (joinable from obs_csv)
distinct(geometry, .keep_all = TRUE) # remove duplicate geometries
sf::write_sf(obs, obs_geo, delete_dsn = T)
}
obs <- sf::read_sf(obs_geo)
nrow(obs) # number of rows
## [1] 9376
# show points on map
mapview::mapview(obs, map.types = "Esri.WorldStreetMap")
Question 1: There were a total of 7,004,451 observations in GBIF for the Red-tailed hawk (Buteo jamaicensis) as of 2022-01-24. Question 2: There was one point from the original dataset that was in the ocean near Europe. To fix this issues, I created a bounding box around the parts of North and Central America and used this bbox to exclude any locations outside of the Red-tailed Hawks habitat.
dir_env <- file.path(dir_data, "env")
# set a default data directory
options(sdmpredictors_datadir = dir_env)
# choosing terrestrial
env_datasets <- sdmpredictors::list_datasets(terrestrial = TRUE, marine = FALSE)
# show table of datasets
env_datasets %>%
select(dataset_code, description, citation) %>%
DT::datatable()
# choose datasets for a vector
env_datasets_vec <- c("WorldClim", "ENVIREM")
# get layers
env_layers <- sdmpredictors::list_layers(env_datasets_vec)
DT::datatable(env_layers)
# choose layers after some inspection and perhaps consulting literature
env_layers_vec <- c("WC_alt", # altitude
"WC_bio1", # annual mean temperature
"WC_bio2", # mean diurnal temperature range
"ER_tri", # terrain roughness
"WC_bio12") # annual precipitation
# get layers
env_stack <- load_layers(env_layers_vec)
# interactive plot layers, hiding all but first (select others)
# mapview(env_stack, hide = T) # makes the html too big for Github
plot(env_stack, nc = 2)
obs_hull_geo <- file.path(dir_data, "obs_hull.geojson")
env_stack_grd <- file.path(dir_data, "env_stack.grd")
if (!file.exists(obs_hull_geo) | redo){
# make convex hull around points of observation
obs_hull <- sf::st_convex_hull(st_union(obs))
# save obs hull
write_sf(obs_hull, obs_hull_geo)
}
obs_hull <- read_sf(obs_hull_geo)
# show points on map
mapview(
list(obs, obs_hull))
## Warning in cbind(`Feature ID` = fid, mat): number of rows of result is not a
## multiple of vector length (arg 1)
if (!file.exists(env_stack_grd) | redo){
obs_hull_sp <- sf::as_Spatial(obs_hull)
env_stack <- raster::mask(env_stack, obs_hull_sp) %>%
raster::crop(extent(obs_hull_sp))
writeRaster(env_stack, env_stack_grd, overwrite = T)
}
env_stack <- stack(env_stack_grd)
# show map
plot(env_stack, nc=2)
absence_geo <- file.path(dir_data, "absence.geojson")
pts_geo <- file.path(dir_data, "pts.geojson")
pts_env_csv <- file.path(dir_data, "pts_env.csv")
if (!file.exists(absence_geo) | redo){
# get raster count of observations
r_obs <- rasterize(
sf::as_Spatial(obs), env_stack[[1]], field=1, fun='count')
# create mask for
r_mask <- mask(env_stack[[1]] > -Inf, r_obs, inverse=T)
# generate random points inside mask
absence <- dismo::randomPoints(r_mask, nrow(obs)) %>%
as_tibble() %>%
st_as_sf(coords = c("x", "y"), crs = 4326)
write_sf(absence, absence_geo, delete_dsn = T)
}
absence <- read_sf(absence_geo)
# show map of presence, ie obs, and absence
mapview(obs, col.regions = "green") +
mapview(absence, col.regions = "gray")
## Warning in cbind(`Feature ID` = fid, mat): number of rows of result is not a
## multiple of vector length (arg 1)
if (!file.exists(pts_env_csv) | redo){
# combine presence and absence into single set of labeled points
pts <- rbind(
obs %>%
mutate(present = 1) %>%
select(present, key),
absence %>% mutate(present = 0,
key = NA)) %>%
mutate(ID = 1:n()) %>%
relocate(ID)
write_sf(pts, pts_geo, delete_dsn = TRUE)
# extract raster values for points
pts_env <- raster::extract(env_stack, as_Spatial(pts), df = TRUE) %>%
tibble() %>%
# join present and geometry columns to raster value results for points
left_join(pts %>% select(ID, present),
by = "ID") %>%
relocate(present, .after = ID) %>%
# extract lon, lat as single columns
mutate(lon = st_coordinates(geometry)[,1],
lat = st_coordinates(geometry)[,2]) %>%
select(-geometry)
write_csv(pts_env, pts_env_csv)
}
pts_env <- read_csv(pts_env_csv)
## Rows: 18752 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (9): ID, present, WC_alt, WC_bio1, WC_bio2, ER_tri, WC_bio12, lon, lat
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pts_env %>%
# show first 10 presence, last 10 absence
slice(c(1:10, (nrow(pts_env)-9):nrow(pts_env))) %>%
DT::datatable(rownames = FALSE,
options = list(dom = "t",
pageLength = 20))
pts_env %>%
select(-ID) %>%
mutate(present = factor(present)) %>%
pivot_longer(-present) %>%
ggplot() +
geom_density(aes(x = value, fill = present)) +
scale_fill_manual(values = alpha(c("gray", "green"), 0.5)) +
scale_x_continuous(expand=c(0,0)) +
scale_y_continuous(expand=c(0,0)) +
theme_bw() +
facet_wrap(~name, scales = "free") +
theme(legend.position = c(1, 0),
legend.justification = c(1, 0))
## Warning: Removed 181 rows containing non-finite values (stat_density).